library(rjson)
library(stringr)
library(stringi)
library(ggplot2)
library(R.utils)
library(multiwayvcov)
library(lmtest)
library(stm)
library(stopwords)
library(data.table)
library(stargazer)
library(here)
library(sentimentr)
library(lubridate)

setwd(here("release_data"))

load("TopicModel.RData")


##############################
##############################
# BASIC INFO
##############################
##############################

### TABLE A1 ###
labs <- labelTopics(immigrFit)
labs

### FIGURE 1 ###
par(mar = c(1,1,1,1))
plotQuote(unique(findThoughts(immigrFit, texts=out$meta$txt, topics=c(1), n=1)$docs[[1]]), width=50,text.cex=1, main="")
plotQuote(unique(findThoughts(immigrFit, texts=out$meta$txt, topics=c(3), n=1)$docs[[1]]), width=50,text.cex=1, main="")
plotQuote(unique(findThoughts(immigrFit, texts=out$meta$txt, topics=c(13), n=1)$docs[[1]]), width=50,text.cex=1, main="")

### TABLE A6 ###

prep <- estimateEffect(1:30 ~ time +channel + time:channel, immigrFit,  meta = out$meta)
placeholder <- lm(1:nrow(out$meta) ~ time +channel +time:channel, data = out$meta)

summary(prep)[3]$tables[[1]]
stargazer(placeholder,coef=list(summary(prep)[3]$tables[[1]][,1]), se=list(summary(prep)[3]$tables[[1]][,2]),t=list(summary(prep)[3]$tables[[1]][,3]),p=list(summary(prep)[3]$tables[[1]][,4]), single.row=T)

summary(prep)[3]$tables[[3]]
stargazer(placeholder,coef=list(summary(prep)[3]$tables[[3]][,1]), se=list(summary(prep)[3]$tables[[1]][,2]),t=list(summary(prep)[3]$tables[[1]][,3]),p=list(summary(prep)[3]$tables[[1]][,4]), single.row=T)

summary(prep)[3]$tables[[13]]
stargazer(placeholder,coef=list(summary(prep)[3]$tables[[13]][,1]), se=list(summary(prep)[3]$tables[[8]][,2]),t=list(summary(prep)[3]$tables[[8]][,3]),p=list(summary(prep)[3]$tables[[8]][,4]), single.row=T)

stargazer(placeholder,placeholder,placeholder, single.row=T)

##############################
##############################
# TOPICS OVER TIME 
##############################
##############################

#get topics per doc to get topic proportions/day
dt <- make.dt(immigrFit)

#define crime and welfare topics

dt$crime <- dt$Topic1 + dt$Topic3
dt$welfare <- dt$Topic13

dt <- dt[,c("docnum","crime","welfare")]

meta <- out$meta

meta <- cbind(meta, dt)

#plot number of immigr segments per date
meta$date <- as.Date(meta$date)

#make sure to get any dates with zero segs
unique_dates <- data.frame(Date=seq(from=min(unique(meta$date)), to=max(unique(meta$date)), by=1))
unique_dates$channel <- "fox"

unique_dates2 <- data.frame(Date=seq(from=min(unique(meta$date)), to=max(unique(meta$date)), by=1))
unique_dates2$channel <- "msnbc"

unique_dates3 <- data.frame(Date=seq(from=min(unique(meta$date)), to=max(unique(meta$date)), by=1))
unique_dates3$channel <- "cnn"

unique_dates <- rbind(unique_dates, unique_dates2, unique_dates3)

num_immigr <- aggregate(meta$welfare,list(meta$date, meta$channel), length)

colnames(num_immigr)<- c("Date","channel","n")
num_immigr <- merge(num_immigr, unique_dates, by=c("Date","channel"), all=T)
num_immigr$n[is.na(num_immigr$n)]<- 0

num_immigr$Date <- as.Date(num_immigr$Date)
num_immigr$mo_yr <- gsub("-\\d\\d$","-01", num_immigr$Date)


num_immigr$post_election <- ifelse(num_immigr$Date>=as.Date("2017-01-20"),1,0)
num_immigr$post_trump <- ifelse(num_immigr$Date>=as.Date("2015-06-16")&num_immigr$Date<=as.Date("2017-01-20"),1,0)

num_immigr$time <- "pre-election"
num_immigr$time[num_immigr$post_trump==1] <- "election"
num_immigr$time[num_immigr$post_election==1] <- "post-election"

######### FIGURE 2 ##########

#aggregate by month
num_immigr <- aggregate(num_immigr$n,list(num_immigr$mo_yr,num_immigr$channel, num_immigr$time), sum)

colnames(num_immigr) <- c("Date","channel","time","n")
num_immigr$Date <- as.Date(num_immigr$Date)

num_immigr$Date[num_immigr$Date<as.Date("2015-01-01")&num_immigr$channel=="msnbc"] <-NA

aggregate(num_immigr$n, list(num_immigr$time,num_immigr$channel), mean)
ggplot(num_immigr, aes(x=Date, y=n, color=channel, group=paste(channel, time)))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+xlab(NULL)+ylab("Num Monthly\nImmigration Segs")+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=1500, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=1500, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=1500, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))


#new coverage measure
meta$crime_dur <- meta$crime
meta$welf_dur <- meta$welfare
meta$trump_dur <- meta$trump

crime_dur <- aggregate(meta$crime_dur, list(meta$channel, meta$date), sum)
welf_dur <- aggregate(meta$welf_dur, list(meta$channel, meta$date), sum)
trump_dur <- aggregate(meta$trump_dur, list(meta$channel, meta$date), sum)

colnames(crime_dur)<- c("channel","date","crime_news")
colnames(welf_dur)<- c("channel","date","welfare_news")
colnames(trump_dur)<- c("channel","date","trump_news")

crime_dur$date <- as.Date(crime_dur$date)
welf_dur$date <- as.Date(welf_dur$date)
trump_dur$date <- as.Date(trump_dur$date)

crime_dur <- merge(crime_dur, unique_dates, by.x=c("date","channel"), by.y=c("Date","channel"), all=T)
welf_dur <- merge(welf_dur, unique_dates, by.x=c("date","channel"), by.y=c("Date","channel"), all=T)
trump_dur <- merge(trump_dur, unique_dates, by.x=c("date","channel"), by.y=c("Date","channel"), all=T)

crime_dur$crime_news[is.na(crime_dur$crime_news)]<- 0
welf_dur$welfare_news[is.na(welf_dur$welfare_news)]<- 0
trump_dur$trump_news[is.na(trump_dur$trump_news)]<- 0

crime_dur$post_election <- ifelse(crime_dur$date>=as.Date("2017-01-20"),1,0)
crime_dur$post_trump <- ifelse(crime_dur$date>=as.Date("2015-06-16")&crime_dur$date<=as.Date("2017-01-20"),1,0)

crime_dur$time <- "pre-election"
crime_dur$time[crime_dur$post_trump==1] <- "election"
crime_dur$time[crime_dur$post_election==1] <- "post-election"


welf_dur$post_election <- ifelse(welf_dur$date>=as.Date("2017-01-20"),1,0)
welf_dur$post_trump <- ifelse(welf_dur$date>=as.Date("2015-06-16")&welf_dur$date<=as.Date("2017-01-20"),1,0)

welf_dur$time <- "pre-election"
welf_dur$time[welf_dur$post_trump==1] <- "election"
welf_dur$time[welf_dur$post_election==1] <- "post-election"


trump_dur$post_election <- ifelse(trump_dur$date>=as.Date("2017-01-20"),1,0)
trump_dur$post_trump <- ifelse(trump_dur$date>=as.Date("2015-06-16")&trump_dur$date<=as.Date("2017-01-20"),1,0)

trump_dur$time <- "pre-election"
trump_dur$time[trump_dur$post_trump==1] <- "election"
trump_dur$time[trump_dur$post_election==1] <- "post-election"

crime_dur$mo_yr <- gsub("-\\d\\d$","-01",crime_dur$date)
welf_dur$mo_yr <- gsub("-\\d\\d$","-01",welf_dur$date)

crime_agg <- aggregate(crime_dur$crime_news, list(crime_dur$mo_yr,crime_dur$channel,crime_dur$time), sum)
welf_agg <- aggregate(welf_dur$welfare_news, list(welf_dur$mo_yr,welf_dur$channel,welf_dur$time), sum)

colnames(crime_agg)<- c("Date","channel","time","n")
colnames(welf_agg)<- c("Date","channel","time","n")

crime_agg$Date <-as.Date(crime_agg$Date)
welf_agg$Date <-as.Date(welf_agg$Date)

crime_agg$n[crime_agg$Date<as.Date("2015-01-01")&crime_agg$channel=="msnbc"]<- NA
welf_agg$n[welf_agg$Date<as.Date("2015-01-01")&welf_agg$channel=="msnbc"]<- NA

######### FIGURE 3 ##########

#plot crime & welfare duration by month
ggplot(crime_agg, aes(x=Date, y=n, color=channel, group=paste(time,channel)))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+xlab(NULL)+ylab("Immigr + Crime \nNews Coverage")+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=120, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=120, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=120, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))
ggplot(welf_agg, aes(x=Date, y=n, color=channel, group=paste(time,channel)))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+xlab(NULL)+ylab("Immigr Welfare \nNews Coverage")+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=45, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=45, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=45, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))


##############################
##############################
# EMOTIONS
##############################
##############################


tmp <- out$meta$txt
# tmp <- gsub("immigr\\w*|illegal immigr\\w*|illegals|llegal alien\\w*","", tmp, ignore.case = T)
tmp <- stri_replace_all_regex(tmp, "immigr\\w*|illegal immigr\\w*|illegals|llegal alien\\w*", "", case_insensitive = TRUE)
tmp <- get_sentences(tmp)
emotions <- sentimentr::emotion(tmp)

agg <- aggregate(emotions$emotion, list(emotions$element_id,emotions$emotion_type),mean)

colnames(agg) <- c("doc_id","emotion","mean")

out$meta$id <- 1:nrow(out$meta)
covars <- out$meta[,c("id","date","channel","txt")]

agg <- merge(agg, covars, by.x="doc_id", by.y="id")

agg$date <- as.Date(agg$date)

#convert to month
day(agg$date) <- 1

######## FIGURE A1 ########

anger <- aggregate(agg$mean[agg$emotion=="anger"], list(agg$date[agg$emotion=="anger"],agg$channel[agg$emotion=="anger"]), mean)

colnames(anger) <- c("date","channel","mean")

anger$emotion <- "anger"



fear <- aggregate(agg$mean[agg$emotion=="fear"], list(agg$date[agg$emotion=="fear"],agg$channel[agg$emotion=="fear"]), mean)

colnames(fear) <- c("date","channel","mean")

fear$emotion <- "fear"


sad <- aggregate(agg$mean[agg$emotion=="sadness"], list(agg$date[agg$emotion=="sadness"],agg$channel[agg$emotion=="sadness"]), mean)

colnames(sad) <- c("date","channel","mean")

sad$emotion <- "sadness"


disg <- aggregate(agg$mean[agg$emotion=="disgust"], list(agg$date[agg$emotion=="disgust"],agg$channel[agg$emotion=="disgust"]), mean)

colnames(disg) <- c("date","channel","mean")

disg$emotion <- "disgust"


neg <- rbind(anger, fear, disg, sad)

neg$date <- as.Date(neg$date)

neg$post_election <- ifelse(neg$date>=as.Date("2017-01-20"),1,0)
neg$post_trump <- ifelse(neg$date>=as.Date("2015-06-16")&neg$date<=as.Date("2017-01-20"),1,0)

neg$time <- "pre-election"
neg$time[neg$post_trump==1] <- "election"
neg$time[neg$post_election==1] <- "post-election"

#dropping null msnbc month
neg <- neg[neg$mean!=0,]
ggplot(neg, aes(x=date, y=mean, color=channel, group=paste(channel, time)))+
  geom_point()+stat_smooth(se=F)+
  scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+
  xlab(NULL)+ylab("Mean Emotion per Seg")+
  geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+
  geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+
  geom_label(x=as.Date("2014-08-01"), y=0.03, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+
  geom_label(x=as.Date("2016-05-01"), y=0.03, label="Campaign", size=3,show.legend = FALSE, color="black")+
  geom_label(x=as.Date("2018-06-01"), y=0.03, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+
  theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))+facet_wrap(~emotion)

######## FIGURE A2 ########

anger <- aggregate(agg$mean[agg$emotion=="anger"], list(agg$date[agg$emotion=="anger"],agg$channel[agg$emotion=="anger"]), sum)

colnames(anger) <- c("date","channel","mean")

anger$emotion <- "anger"



fear <- aggregate(agg$mean[agg$emotion=="fear"], list(agg$date[agg$emotion=="fear"],agg$channel[agg$emotion=="fear"]), sum)

colnames(fear) <- c("date","channel","mean")

fear$emotion <- "fear"


sad <- aggregate(agg$mean[agg$emotion=="sadness"], list(agg$date[agg$emotion=="sadness"],agg$channel[agg$emotion=="sadness"]), sum)

colnames(sad) <- c("date","channel","mean")

sad$emotion <- "sadness"


disg <- aggregate(agg$mean[agg$emotion=="disgust"], list(agg$date[agg$emotion=="disgust"],agg$channel[agg$emotion=="disgust"]), sum)

colnames(disg) <- c("date","channel","mean")

disg$emotion <- "disgust"


neg <- rbind(anger, fear, disg, sad)

neg$date <- as.Date(neg$date)

neg$post_election <- ifelse(neg$date>=as.Date("2017-01-20"),1,0)
neg$post_trump <- ifelse(neg$date>=as.Date("2015-06-16")&neg$date<=as.Date("2017-01-20"),1,0)

neg$time <- "pre-election"
neg$time[neg$post_trump==1] <- "election"
neg$time[neg$post_election==1] <- "post-election"

#dropping null msnbc month
neg <- neg[neg$mean!=0,]
ggplot(neg, aes(x=date, y=mean, color=channel, group=paste(channel, time)))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+xlab(NULL)+ylab("Total Emotion")+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=25, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=25, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=25, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))+facet_wrap(~emotion)

setwd(here("output"))
write.csv(agg, file="emotions_immigr.csv")